曾意儒 Yi-Ju Tseng
“In brief, the grammar tells us that a statistical graphic is a mapping from data to aesthetic attributes (colour, shape, size) of geometric objects (points, lines, bars). The plot may also contain statistical transformations of the data and is drawn on a specific coordinate system”
-from ggplot2 book
做圖的文法包括兩個最主要元素
其他元素
qplot(x軸名稱,y軸名稱,data=使用資料): 散佈圖library(SportsAnalytics)
NBA1516<-fetch_NBAPlayerStatistics("15-16")
library(ggplot2)
qplot(FieldGoalsAttempted, TotalPoints,
data = NBA1516)
color=Position,用守備位置Position著色qplot(FieldGoalsAttempted, TotalPoints,
data = NBA1516,color=Position)
geom = c("point", "smooth") 在圖上加點與漸進線qplot(FieldGoalsAttempted, TotalPoints,
data = NBA1516,
geom = c("point", "smooth"))
#fill = Position 用守備位置Position著色
qplot(TotalPoints, data = NBA1516,
fill = Position)
facets = 來設定子圖分類的依據參數直向,橫向分類部分可用 . 表示facets = 直向分類~橫向分類
#. ~ Position 用守備位置Position分群畫圖(橫向)
qplot(FieldGoalsAttempted, TotalPoints,
data = NBA1516,
facets = . ~ Position)
facets = 直向分類~橫向分類
#. ~ Position 用守備位置Position分群畫圖(直向)
qplot(FieldGoalsAttempted, TotalPoints,
data = NBA1516,
facets = Position ~ .)
binwidth參數設定qplot(TotalPoints, data = NBA1516,
facets = Position ~ ., binwidth = 2)
#. ~ Position 用守備位置Position分群畫圖(直向)
#binwidth = 100 每100分一組
qplot(TotalPoints, data = NBA1516,
facets = Position ~ ., binwidth = 100)
qplot()提供快速方便的畫圖功能ggplot()函式使用ggplot2作圖有以下步驟:
aes(x, y, ...)指定geom_point()geom_line()geom_polygon()geom_errorbar()library(ggplot2)
##先安裝 install.packages("ggplot2")
ggplot(iris,
aes(x = Species, y = Sepal.Length)) +
geom_point()
用geom_boxplot()改畫盒狀圖
ggplot(iris,
aes(x = Species,
y = Sepal.Length)) +
geom_boxplot()
直向分類~橫向分類
ggplot(iris,
aes(x = Sepal.Width,
y = Sepal.Length)) +
geom_point()+facet_grid(Species~.)
直向分類~橫向分類
ggplot(iris,
aes(x = Sepal.Width,
y = Sepal.Length)) +
geom_point()+facet_grid(.~Species)
替xy散佈圖加上趨勢線
ggplot(iris,
aes(x = Sepal.Width,
y = Sepal.Length)) +
geom_point()+facet_grid(Species~.)+
geom_smooth()
替xy散佈圖加上趨勢線,使用linear regresion
ggplot(iris,
aes(x = Sepal.Width,
y = Sepal.Length)) +
geom_point()+facet_grid(Species~.)+
geom_smooth(method='lm')
改用geom_line()畫線
ggplot(iris,
aes(x = Sepal.Width,
y = Sepal.Length)) +
geom_line()+facet_grid(Species~.)
改用顏色分組,使用aes(color='group name')
ggplot(iris,
aes(x = Sepal.Width,
y = Sepal.Length,
color=Species)) +
geom_line()
誤差值的計算有下列三種選擇:
geom_barlibrary(datasets)
library(dplyr)
#將Month轉為因子變項
airquality$Month<-as.factor(airquality$Month)
#計算每月Ozone平均
airquality.mean<-airquality%>%group_by(Month)%>%
summarise(zoneMean=mean(Ozone,na.rm = T))
#stat = "identity" 直接畫數字
ggplot()+geom_bar(data=airquality.mean,
aes(x=Month,y=OzoneMean),
stat = "identity")
geom_errorbar()函式#計算每月Ozone平均與標準差
airquality.stat<-airquality.mean<-airquality%>%group_by(Month)%>%
summarise(OzoneMean=mean(Ozone,na.rm = T)
OzoneSD=sd(Ozone,na.rm = T))
ggplot(data=airquality.stat)+
geom_bar(aes(x=Month,y=OzoneMean),
stat = "identity")+
geom_errorbar( #ymin低點, ymax高點
aes(x=Month,ymin=OzoneMean-OzoneSD,
ymax=OzoneMean+OzoneSD), width=.1)
choroplethr package來畫面量圖ggplot2 package的面量圖做圖工具choroplethrMaps package##第一次使用前先安裝
install.packages(c("choroplethr",
"choroplethrMaps"))
library(choroplethr)
state_choropleth()函式畫出美國人口分布data(df_pop_state) #記載各州人口數的資料
#把各州人口畫在地圖上
state_choropleth(df_pop_state)
若在將reference_map設定為= TRUE,可在面量圖的背景加上google地圖
data(continental_us_states)
state_choropleth(df_pop_state,
reference_map = TRUE,
zoom= continental_us_states)
ggplot2套件開發##第一次使用前先安裝
install.packages("ggmap")
get_googlemap()函式取得google map圖層
ggmap()函式將取得的圖層畫出來library(ggmap)
twmap <- get_googlemap(location = 'Taiwan',
zoom = 7,
language = "zh-TW")
ggmap(twmap)
ggmap package與各式資料結合呈現library(jsonlite)
library(RCurl)
WaterData<-fromJSON(getURL("http://data.taipei/opendata/datalist/apiAccess?scope=resourceAquire&rid=190796c8-7c56-42e0-8068-39242b8ec927"))
WaterDataFrame<-WaterData$result$results
WaterDataFrame$longitude<-as.numeric(WaterDataFrame$longitude)
WaterDataFrame$latitude<-as.numeric(WaterDataFrame$latitude)
WaterDataFrame$qua_cntu<-as.numeric(WaterDataFrame$qua_cntu)
WaterDataClean<-WaterDataFrame[WaterDataFrame$qua_cntu>=0,]
head(WaterDataClean)
_id update_date update_time qua_id code_name
1 1 2018-05-03 00:00:00 CS00 雙溪淨水場
2 2 2018-05-03 00:00:00 CS01 衛理女中
3 3 2018-05-03 00:00:00 CS02 雙溪國小
4 4 2018-05-03 00:00:00 CS03 華興加壓站
5 5 2018-05-03 00:00:00 CX00 長興淨水場
6 6 2018-05-03 00:00:00 CX02 市政大樓
longitude latitude qua_cntu qua_cl qua_ph
1 121.5609 25.11574 0.02 0.59 7.6
2 121.5440 25.10325 0.09 0.37 7.6
3 121.5556 25.10763 0.07 0.46 7.6
4 121.5348 25.10356 0.12 0.53 7.4
5 121.5404 25.01633 0.03 0.58 7.2
6 121.5566 25.04250 0.05 0.57 7.1
library(ggmap)
TaipeiMap <- get_googlemap(
location = c(121.43,24.93,121.62,25.19),
zoom = 11, maptype = 'roadmap')
TaipeiMapO <- ggmap(TaipeiMap)+
geom_point(data=WaterDataClean,
aes(x=longitude, y=latitude,
color=qua_cntu,size=3.5))+
scale_color_continuous(
low = "yellow",high = "red")+
guides(size=FALSE)
TaipeiMapO
ggmap套件提供多種地圖型態,使用者可透過設定maptype自行選擇適合的地圖樣式,樣式有:
透過設定extent參數可將地圖輸出樣式改為滿版
library(ggmap)
#extent = 'device' 滿版
ggmap(TaipeiMap,extent = 'device')
透過設定extent參數可將地圖輸出樣式改為滿版
ggplot2 + ggmap)取得美國各州中心座標資料以及美國各州人口資料
#取得美國各州中心座標資料
StateCenter<-data.frame(
region=tolower(state.name),
lon=state.center$x,
lat=state.center$y)
head(StateCenter,1)
region lon lat
1 alabama -86.7509 32.5901
取得美國各州中心座標資料以及美國各州人口資料
#美國各州人口資料
StatePop<-merge(df_pop_state,
StateCenter,by="region")
head(StatePop,1)
region value lon lat
1 alabama 4777326 -86.7509 32.5901
PopPoint<-NULL
for(i in 1:nrow(StatePop)){
#每100萬人轉為1點
for(j in 1:(StatePop[i,"value"]/1000000)){
PopPoint<-rbind(PopPoint,StatePop[i,])
}
}
head(PopPoint,3)
region value lon lat
1 alabama 4777326 -86.7509 32.5901
2 alabama 4777326 -86.7509 32.5901
3 alabama 4777326 -86.7509 32.5901
USMap <- get_googlemap(location = "United States", zoom = 4)
densityMap<-ggmap(USMap, extent = "device") +
geom_density2d(data = PopPoint, aes(x = lon, y = lat), size = 0.3) +
stat_density2d(data = PopPoint,
aes(x = lon, y = lat,
fill = ..level.., alpha = ..level..),
size = 0.01, bins = 16, geom = "polygon") +
scale_fill_gradient(low = "green",
high = "red", guide = FALSE) +
scale_alpha(range = c(0, 0.3), guide = FALSE)
densityMap
shapefile .shpshapefile與ggplot2畫圖的步驟如下:
rgdal, rgeos,maptools package處理地圖檔shapefileggplot2 & RColorBrewer 畫圖rgdal, rgeos,maptoolslibrary(ggplot2)
library(rgdal)#for fortify()
library(rgeos) #for fortify()
library(maptools) #for readShapeSpatial()
#空間資料檔名 請自行下載
tw_new <- readShapeSpatial("Taiwan/Town_MOI_1041215.shp")
head(tw_new$Town_ID)
[1] 1001402 1001321 1000913 1001411 1001416 1000712
shapefile物件轉為data.frametw_new.df <-
fortify(tw_new, region = "T_UID")
head(tw_new.df,10)
long lat order hole piece id group
1 119.9170 26.17518 1 FALSE 1 1 1.1
2 119.9171 26.17517 2 FALSE 1 1 1.1
3 119.9171 26.17518 3 FALSE 1 1 1.1
4 119.9171 26.17518 4 FALSE 1 1 1.1
5 119.9171 26.17518 5 FALSE 1 1 1.1
6 119.9172 26.17518 6 FALSE 1 1 1.1
7 119.9172 26.17518 7 FALSE 1 1 1.1
8 119.9172 26.17518 8 FALSE 1 1 1.1
9 119.9173 26.17515 9 FALSE 1 1 1.1
10 119.9173 26.17515 10 FALSE 1 1 1.1
#做一個假資料來畫
#prevalence設為亂數rnorm(需要的亂數個數)
mydata<-data.frame(NAME_2=tw_new$T_Name,
id=tw_new$T_UID,
pre=rnorm(length(tw_new$T_UID)),
stringsAsFactors = F)
head(mydata)
NAME_2 id pre
1 \xa6\xa8\xa5\\\xc2\xed 178 1.0551637
2 \xa8ΥV\xb6m 164 -0.6307466
3 \xb3\xc1\xbcd\xb6m 118 -1.2255327
4 \xba\xf1\xaeq\xb6m 376 0.1314583
5 \xc4\xf5\xc0\xac\xb6m 369 1.3665832
6 \xa5Ф\xa4\xc2\xed 78 -0.3132549
利用iconv將不知所以然的代碼(\xa6\xa8\xa5\xc2\xed)轉為看得懂的中文
#from big5 to utf-8 #NAME_2原本是factor
mydata$NAME_2<-
iconv(mydata$NAME_2,
from="big5", to = "UTF-8")
head(mydata,10)
NAME_2 id pre
1 成功鎮 178 1.0551637
2 佳冬鄉 164 -0.6307466
3 麥寮鄉 118 -1.2255327
4 綠島鄉 376 0.1314583
5 蘭嶼鄉 369 1.3665832
6 田中鎮 78 -0.3132549
7 社頭鄉 83 1.2072224
8 竹田鄉 157 0.7312959
9 萬丹鄉 148 1.4849184
10 三灣鄉 64 0.6094254
將有prevalence的假數據mydata和經緯度資料tw_new.df合併, 用merge()
final.plot<-merge(tw_new.df,
mydata,by="id",all.x=T)
head(final.plot,10)
id long lat order hole piece group NAME_2 pre
1 1 119.9170 26.17518 1 FALSE 1 1.1 南竿鄉 0.9584632
2 1 119.9171 26.17517 2 FALSE 1 1.1 南竿鄉 0.9584632
3 1 119.9171 26.17518 3 FALSE 1 1.1 南竿鄉 0.9584632
4 1 119.9171 26.17518 4 FALSE 1 1.1 南竿鄉 0.9584632
5 1 119.9171 26.17518 5 FALSE 1 1.1 南竿鄉 0.9584632
6 1 119.9172 26.17518 6 FALSE 1 1.1 南竿鄉 0.9584632
7 1 119.9172 26.17518 7 FALSE 1 1.1 南竿鄉 0.9584632
8 1 119.9172 26.17518 8 FALSE 1 1.1 南竿鄉 0.9584632
9 1 119.9173 26.17515 9 FALSE 1 1.1 南竿鄉 0.9584632
10 1 119.9173 26.17515 10 FALSE 1 1.1 南竿鄉 0.9584632
library(RColorBrewer) #配色用brewer.pal( 9 , "Reds" )
twcmap<-ggplot() +
geom_polygon(data = final.plot,
aes(x = long, y = lat,
group = group,
fill = prevalence),
color = "black",
size = 0.25) +
coord_map()+#維持地圖比例
scale_fill_gradientn(
colours = brewer.pal(9,"Reds"))+
theme_void()+
labs(title="Prevalence of X in Taiwan")
twcmap
library(ggmap)
twmap <- get_googlemap(location = 'Taiwan',
zoom = 7,
language = "zh-TW")
ggmap(twmap)+ #ggmap
geom_polygon(data = final.plot, #面量圖
aes(x = long, y = lat,
group = group, fill = pre),
color = "grey80", size = 0.1,
alpha = 0.5) +
scale_fill_gradientn(
colours = brewer.pal(9,"Reds"))
geom_tile()來畫Heatmap#讀.csv檔案
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
head(nba,3)
Name G MIN PTS FGM FGA FGP FTM FTA FTP X3PM X3PA X3PP
1 Dwyane Wade 79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765 1.1 3.5 0.317
2 LeBron James 81 37.7 28.4 9.7 19.9 0.489 7.3 9.4 0.780 1.6 4.7 0.344
3 Kobe Bryant 82 36.2 26.8 9.8 20.9 0.467 5.9 6.9 0.856 1.4 4.1 0.351
ORB DRB TRB AST STL BLK TO PF
1 1.1 3.9 5.0 7.5 2.2 1.3 3.4 2.3
2 1.3 6.3 7.6 7.2 1.7 1.1 3.0 1.7
3 1.1 4.1 5.2 4.9 1.5 0.5 2.6 2.3
為了做圖,將寬表轉長表
library(reshape2) #for melt()
#寬表轉長表,以名字作依據
nba.m <- melt(nba,id.vars = "Name")
head(nba.m,5)
| Name | variable | value |
|---|---|---|
| Dwyane Wade | G | 79 |
| LeBron James | G | 81 |
| Kobe Bryant | G | 82 |
| Dirk Nowitzki | G | 81 |
| Danny Granger | G | 67 |
將Geometric objects指定為geom_tile()
library(ggplot2) #for ggplot()
ggplot(nba.m, aes(variable, Name)) +
geom_tile(aes(fill = value),
colour = "white")+
scale_fill_gradient(
low = "white",high = "steelblue")
#scale處理
library(dplyr)
nba.s<-nba %>%
mutate_each(funs(scale), -Name)
head(nba.s,2)
| Name | G | MIN | PTS | FGM | FGA | FGP | FTM | FTA | FTP | X3PM | X3PA | X3PP | ORB | DRB | TRB | AST | STL | BLK | TO | PF |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Dwyane Wade | 0.6179300 | 1.0019702 | 3.179941 | 2.920022 | 2.596832 | 0.5136017 | 1.917475 | 2.110772 | -0.7401673 | -0.1080044 | 0.1303647 | -0.15749098 | -0.27213551 | -0.3465676 | -0.3287465 | 1.652247 | 2.558238 | 1.2064646 | 1.790445 | -0.2984568 |
| LeBron James | 0.7693834 | 0.6119299 | 2.566974 | 1.957185 | 1.697237 | 0.4649190 | 1.778729 | 1.896589 | -0.5233214 | 0.4920201 | 0.6971679 | 0.02738974 | -0.06117775 | 1.0080940 | 0.6605370 | 1.516147 | 1.367252 | 0.8627425 | 1.059651 | -1.3903719 |
nba.s.m <- melt(nba.s) ##寬轉長
ggplot(nba.s.m, aes(variable, Name)) +
geom_tile(aes(fill = value),
colour = "white")+
scale_fill_gradient(
low = "white",high = "steelblue")
treemap packageslibrary(treemap)
data(GNI2014)
knitr::kable(head(GNI2014))
| iso3 | country | continent | population | GNI | |
|---|---|---|---|---|---|
| 3 | BMU | Bermuda | North America | 67837 | 106140 |
| 4 | NOR | Norway | Europe | 4676305 | 103630 |
| 5 | QAT | Qatar | Asia | 833285 | 92200 |
| 6 | CHE | Switzerland | Europe | 7604467 | 88120 |
| 7 | MAC | Macao SAR, China | Asia | 559846 | 76270 |
| 8 | LUX | Luxembourg | Europe | 491775 | 75990 |
library(treemap)
data(GNI2014)
treemap(GNI2014,
index=c("continent", "iso3"), #分組依據
vSize="population", #區塊大小
vColor="GNI", #顏色深淺
type="value")